home *** CD-ROM | disk | FTP | other *** search
/ Chip 2007 January, February, March & April / Chip-Cover-CD-2007-02.iso / Pakiet bezpieczenstwa / mini Pentoo LiveCD 2006.1 / mpentoo-2006.1.iso / livecd.squashfs / usr / lib / python2.4 / email / FeedParser.pyc (.txt) < prev    next >
Python Compiled Bytecode  |  2005-10-18  |  11KB  |  451 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.4)
  3.  
  4. """FeedParser - An email feed parser.
  5.  
  6. The feed parser implements an interface for incrementally parsing an email
  7. message, line by line.  This has advantages for certain applications, such as
  8. those reading email messages off a socket.
  9.  
  10. FeedParser.feed() is the primary interface for pushing new data into the
  11. parser.  It returns when there's nothing more it can do with the available
  12. data.  When you have no more data to push into the parser, call .close().
  13. This completes the parsing and returns the root message object.
  14.  
  15. The other advantage of this parser is that it will never throw a parsing
  16. exception.  Instead, when it finds something unexpected, it adds a 'defect' to
  17. the current message.  Defects are just instances that live on the message
  18. object's .defects attribute.
  19. """
  20. import re
  21. from email import Errors
  22. from email import Message
  23. NLCRE = re.compile('\r\n|\r|\n')
  24. NLCRE_bol = re.compile('(\r\n|\r|\n)')
  25. NLCRE_eol = re.compile('(\r\n|\r|\n)$')
  26. NLCRE_crack = re.compile('(\r\n|\r|\n)')
  27. headerRE = re.compile('^(From |[\\041-\\071\\073-\\176]{2,}:|[\\t ])')
  28. EMPTYSTRING = ''
  29. NL = '\n'
  30. NeedMoreData = object()
  31.  
  32. class BufferedSubFile(object):
  33.     '''A file-ish object that can have new data loaded into it.
  34.  
  35.     You can also push and pop line-matching predicates onto a stack.  When the
  36.     current predicate matches the current line, a false EOF response
  37.     (i.e. empty string) is returned instead.  This lets the parser adhere to a
  38.     simple abstraction -- it parses until EOF closes the current message.
  39.     '''
  40.     
  41.     def __init__(self):
  42.         self._partial = ''
  43.         self._lines = []
  44.         self._eofstack = []
  45.         self._closed = False
  46.  
  47.     
  48.     def push_eof_matcher(self, pred):
  49.         self._eofstack.append(pred)
  50.  
  51.     
  52.     def pop_eof_matcher(self):
  53.         return self._eofstack.pop()
  54.  
  55.     
  56.     def close(self):
  57.         self._lines.append(self._partial)
  58.         self._partial = ''
  59.         self._closed = True
  60.  
  61.     
  62.     def readline(self):
  63.         if not self._lines:
  64.             if self._closed:
  65.                 return ''
  66.             
  67.             return NeedMoreData
  68.         
  69.         line = self._lines.pop()
  70.         for ateof in self._eofstack[::-1]:
  71.             if ateof(line):
  72.                 self._lines.append(line)
  73.                 return ''
  74.                 continue
  75.         
  76.         return line
  77.  
  78.     
  79.     def unreadline(self, line):
  80.         if not line is not NeedMoreData:
  81.             raise AssertionError
  82.         self._lines.append(line)
  83.  
  84.     
  85.     def push(self, data):
  86.         '''Push some new data into this object.'''
  87.         data = self._partial + data
  88.         self._partial = ''
  89.         parts = NLCRE_crack.split(data)
  90.         self._partial = parts.pop()
  91.         lines = []
  92.         for i in range(len(parts) // 2):
  93.             lines.append(parts[i * 2] + parts[i * 2 + 1])
  94.         
  95.         self.pushlines(lines)
  96.  
  97.     
  98.     def pushlines(self, lines):
  99.         self._lines[:0] = lines[::-1]
  100.  
  101.     
  102.     def is_closed(self):
  103.         return self._closed
  104.  
  105.     
  106.     def __iter__(self):
  107.         return self
  108.  
  109.     
  110.     def next(self):
  111.         line = self.readline()
  112.         if line == '':
  113.             raise StopIteration
  114.         
  115.         return line
  116.  
  117.  
  118.  
  119. class FeedParser:
  120.     '''A feed-style parser of email.'''
  121.     
  122.     def __init__(self, _factory = Message.Message):
  123.         '''_factory is called with no arguments to create a new message obj'''
  124.         self._factory = _factory
  125.         self._input = BufferedSubFile()
  126.         self._msgstack = []
  127.         self._parse = self._parsegen().next
  128.         self._cur = None
  129.         self._last = None
  130.         self._headersonly = False
  131.  
  132.     
  133.     def _set_headersonly(self):
  134.         self._headersonly = True
  135.  
  136.     
  137.     def feed(self, data):
  138.         '''Push more data into the parser.'''
  139.         self._input.push(data)
  140.         self._call_parse()
  141.  
  142.     
  143.     def _call_parse(self):
  144.         
  145.         try:
  146.             self._parse()
  147.         except StopIteration:
  148.             pass
  149.  
  150.  
  151.     
  152.     def close(self):
  153.         '''Parse all remaining data and return the root message object.'''
  154.         self._input.close()
  155.         self._call_parse()
  156.         root = self._pop_message()
  157.         if not not (self._msgstack):
  158.             raise AssertionError
  159.         if root.get_content_maintype() == 'multipart' and not root.is_multipart():
  160.             root.defects.append(Errors.MultipartInvariantViolationDefect())
  161.         
  162.         return root
  163.  
  164.     
  165.     def _new_message(self):
  166.         msg = self._factory()
  167.         if self._cur and self._cur.get_content_type() == 'multipart/digest':
  168.             msg.set_default_type('message/rfc822')
  169.         
  170.         if self._msgstack:
  171.             self._msgstack[-1].attach(msg)
  172.         
  173.         self._msgstack.append(msg)
  174.         self._cur = msg
  175.         self._last = msg
  176.  
  177.     
  178.     def _pop_message(self):
  179.         retval = self._msgstack.pop()
  180.         if self._msgstack:
  181.             self._cur = self._msgstack[-1]
  182.         else:
  183.             self._cur = None
  184.         return retval
  185.  
  186.     
  187.     def _parsegen(self):
  188.         self._new_message()
  189.         headers = []
  190.         for line in self._input:
  191.             if line is NeedMoreData:
  192.                 yield NeedMoreData
  193.                 continue
  194.             
  195.             if not headerRE.match(line):
  196.                 if not NLCRE.match(line):
  197.                     self._input.unreadline(line)
  198.                 
  199.                 break
  200.             
  201.             headers.append(line)
  202.         
  203.         self._parse_headers(headers)
  204.         if self._headersonly:
  205.             lines = []
  206.             while True:
  207.                 line = self._input.readline()
  208.                 if line is NeedMoreData:
  209.                     yield NeedMoreData
  210.                     continue
  211.                 
  212.                 if line == '':
  213.                     break
  214.                 
  215.                 lines.append(line)
  216.             self._cur.set_payload(EMPTYSTRING.join(lines))
  217.             return None
  218.         
  219.         if self._cur.get_content_type() == 'message/delivery-status':
  220.             while True:
  221.                 self._input.push_eof_matcher(NLCRE.match)
  222.                 for retval in self._parsegen():
  223.                     if retval is NeedMoreData:
  224.                         yield NeedMoreData
  225.                         continue
  226.                     
  227.                     break
  228.                 
  229.                 msg = self._pop_message()
  230.                 self._input.pop_eof_matcher()
  231.                 while True:
  232.                     line = self._input.readline()
  233.                     if line is NeedMoreData:
  234.                         yield NeedMoreData
  235.                         continue
  236.                     
  237.                     break
  238.                 while True:
  239.                     line = self._input.readline()
  240.                     if line is NeedMoreData:
  241.                         yield NeedMoreData
  242.                         continue
  243.                     
  244.                     break
  245.                 if line == '':
  246.                     break
  247.                 
  248.                 self._input.unreadline(line)
  249.             return None
  250.         
  251.         if self._cur.get_content_maintype() == 'message':
  252.             for retval in self._parsegen():
  253.                 if retval is NeedMoreData:
  254.                     yield NeedMoreData
  255.                     continue
  256.                 
  257.                 break
  258.             
  259.             self._pop_message()
  260.             return None
  261.         
  262.         if self._cur.get_content_maintype() == 'multipart':
  263.             boundary = self._cur.get_boundary()
  264.             if boundary is None:
  265.                 self._cur.defects.append(Errors.NoBoundaryInMultipartDefect())
  266.                 lines = []
  267.                 for line in self._input:
  268.                     if line is NeedMoreData:
  269.                         yield NeedMoreData
  270.                         continue
  271.                     
  272.                     lines.append(line)
  273.                 
  274.                 self._cur.set_payload(EMPTYSTRING.join(lines))
  275.                 return None
  276.             
  277.             separator = '--' + boundary
  278.             boundaryre = re.compile('(?P<sep>' + re.escape(separator) + ')(?P<end>--)?(?P<ws>[ \\t]*)(?P<linesep>\\r\\n|\\r|\\n)?$')
  279.             capturing_preamble = True
  280.             preamble = []
  281.             linesep = False
  282.             while True:
  283.                 line = self._input.readline()
  284.                 if line is NeedMoreData:
  285.                     yield NeedMoreData
  286.                     continue
  287.                 
  288.                 if line == '':
  289.                     break
  290.                 
  291.                 mo = boundaryre.match(line)
  292.                 if mo:
  293.                     if mo.group('end'):
  294.                         linesep = mo.group('linesep')
  295.                         break
  296.                     
  297.                     if capturing_preamble:
  298.                         if preamble:
  299.                             lastline = preamble[-1]
  300.                             eolmo = NLCRE_eol.search(lastline)
  301.                             if eolmo:
  302.                                 preamble[-1] = lastline[:-len(eolmo.group(0))]
  303.                             
  304.                             self._cur.preamble = EMPTYSTRING.join(preamble)
  305.                         
  306.                         capturing_preamble = False
  307.                         self._input.unreadline(line)
  308.                         continue
  309.                     
  310.                     while True:
  311.                         line = self._input.readline()
  312.                         if line is NeedMoreData:
  313.                             yield NeedMoreData
  314.                             continue
  315.                         
  316.                         mo = boundaryre.match(line)
  317.                         if not mo:
  318.                             self._input.unreadline(line)
  319.                             break
  320.                             continue
  321.                     self._input.push_eof_matcher(boundaryre.match)
  322.                     for retval in self._parsegen():
  323.                         if retval is NeedMoreData:
  324.                             yield NeedMoreData
  325.                             continue
  326.                         
  327.                         break
  328.                     
  329.                     if self._last.get_content_maintype() == 'multipart':
  330.                         epilogue = self._last.epilogue
  331.                         if epilogue == '':
  332.                             self._last.epilogue = None
  333.                         elif epilogue is not None:
  334.                             mo = NLCRE_eol.search(epilogue)
  335.                             if mo:
  336.                                 end = len(mo.group(0))
  337.                                 self._last.epilogue = epilogue[:-end]
  338.                             
  339.                         
  340.                     else:
  341.                         payload = self._last.get_payload()
  342.                         if isinstance(payload, basestring):
  343.                             mo = NLCRE_eol.search(payload)
  344.                             if mo:
  345.                                 payload = payload[:-len(mo.group(0))]
  346.                                 self._last.set_payload(payload)
  347.                             
  348.                         
  349.                     self._input.pop_eof_matcher()
  350.                     self._pop_message()
  351.                     self._last = self._cur
  352.                     continue
  353.                 if not capturing_preamble:
  354.                     raise AssertionError
  355.                 preamble.append(line)
  356.             if capturing_preamble:
  357.                 self._cur.defects.append(Errors.StartBoundaryNotFoundDefect())
  358.                 self._cur.set_payload(EMPTYSTRING.join(preamble))
  359.                 epilogue = []
  360.                 for line in self._input:
  361.                     if line is NeedMoreData:
  362.                         yield NeedMoreData
  363.                         continue
  364.                         continue
  365.                 
  366.                 self._cur.epilogue = EMPTYSTRING.join(epilogue)
  367.                 return None
  368.             
  369.             if linesep:
  370.                 epilogue = [
  371.                     '']
  372.             else:
  373.                 epilogue = []
  374.             for line in self._input:
  375.                 if line is NeedMoreData:
  376.                     yield NeedMoreData
  377.                     continue
  378.                 
  379.                 epilogue.append(line)
  380.             
  381.             if epilogue:
  382.                 firstline = epilogue[0]
  383.                 bolmo = NLCRE_bol.match(firstline)
  384.                 if bolmo:
  385.                     epilogue[0] = firstline[len(bolmo.group(0)):]
  386.                 
  387.             
  388.             self._cur.epilogue = EMPTYSTRING.join(epilogue)
  389.             return None
  390.         
  391.         lines = []
  392.         for line in self._input:
  393.             if line is NeedMoreData:
  394.                 yield NeedMoreData
  395.                 continue
  396.             
  397.             lines.append(line)
  398.         
  399.         self._cur.set_payload(EMPTYSTRING.join(lines))
  400.  
  401.     
  402.     def _parse_headers(self, lines):
  403.         lastheader = ''
  404.         lastvalue = []
  405.         for lineno, line in enumerate(lines):
  406.             if line[0] in ' \t':
  407.                 if not lastheader:
  408.                     defect = Errors.FirstHeaderLineIsContinuationDefect(line)
  409.                     self._cur.defects.append(defect)
  410.                     continue
  411.                 
  412.                 lastvalue.append(line)
  413.                 continue
  414.             
  415.             if lastheader:
  416.                 lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
  417.                 self._cur[lastheader] = lhdr
  418.                 lastheader = ''
  419.                 lastvalue = []
  420.             
  421.             if line.startswith('From '):
  422.                 if lineno == 0:
  423.                     mo = NLCRE_eol.search(line)
  424.                     if mo:
  425.                         line = line[:-len(mo.group(0))]
  426.                     
  427.                     self._cur.set_unixfrom(line)
  428.                     continue
  429.                 elif lineno == len(lines) - 1:
  430.                     self._input.unreadline(line)
  431.                     return None
  432.                 else:
  433.                     defect = Errors.MisplacedEnvelopeHeaderDefect(line)
  434.                     self._cur.defects.append(defect)
  435.             
  436.             i = line.find(':')
  437.             if i < 0:
  438.                 defect = Errors.MalformedHeaderDefect(line)
  439.                 self._cur.defects.append(defect)
  440.                 continue
  441.             
  442.             lastheader = line[:i]
  443.             lastvalue = [
  444.                 line[i + 1:].lstrip()]
  445.         
  446.         if lastheader:
  447.             self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
  448.         
  449.  
  450.  
  451.